/******************************************************************************/
* Author: 	A.M. Birkenbach, M.-Y. Lee, & M.D. Smith
* Date: 	08/02/2023
* Project: 	Counterfactual Modeling of Multispecies Fisheries Outcomes Under 
*			Market-Based Regulation
* Purpose: 	Produce simulation datasets
* Inputs:	trip_limits_forsim.dta
*			prices2.dta
*			fuel_prices.dta
*			BLS_QCEW_relevant_wages_clean.dta	
*			multipliers.dta
*			actual_post_qtykept.dta
*			weather_reanalysis.dta
*			fish_latslons.dta
*			scallop_start_of_season.dta	
*			trawl_survey_weight.dta
*			sectorACLs.dta
*			LA_permits_by_vessel_fy.dta			
*			data_for_discrete_choice_pre.dta
*			data_for_discrete_choice_post.dta
/******************************************************************************/

clear all
set more off
gl input ""
gl working ""
gl output ""

cap log close
qui log using "$output\2__DATA_FOR_SIMULATIONS_log.txt", replace text

/************************************************************************/
/***** VALIDATION: PREDICTING POST PERIOD USING CS POLICY STRUCTURE *****/
/************************************************************************/
use "$input\multipliers.dta" if gffishingyear>=2010, clear
collapse (mean) *multiplier, by(gearcat hullnum spstock2* month gffishingyear)
rename landing_multiplier l_
rename catch_multiplier c_
reshape wide l_ c_, i(gearcat hullnum spstock2_prim month gffishingyear)j(spstock2)string
rename spstock2_prim spstock2
qui save "$working\postmultipliers.dta", replace

use "$input\data_for_discrete_choice_post.dta", clear
cap drop qtykept
merge 1:1 hullnum gearcat spstock date using ///
	"$input\actual_post_qtykept.dta", keep(1 3)
replace qtykept=0 if qtykept==.
drop _merge
merge m:1 date using "$input\trip_limits_forsim.dta", keep(1 3) nogen
compress
qui save "$working\data_for_simulations_POSTasPOST.dta", replace



/***************************************************************************************/
/***** COUNTERFACTUAL: PREDICTING POST PERIOD IF GROUNDFISH HAD REMAINED UNDER DAS *****/
/***************************************************************************************/
*set up pre multipliers with post prices
use "$input\multipliers.dta", clear
keep if gffishingyear<2010
collapse (mean) *multiplier, by(gearcat hullnum spstock2* month)
qui save "$working\temp_premultipliers.dta", replace
use "$input\multipliers.dta", clear
drop *multiplier
keep if gffishingyear>=2010
merge m:1 gearcat hullnum spstock2 spstock2_prim month ///
	using "$working\temp_premultipliers.dta", keep (1 3)
assert _merge==3
drop _merge
gen date_=mdy(month,1,calyear)
egen group=group(hullnum spstock2*)
compress
drop month gffishingyear
levelsof calyear, loc(years)
foreach year in `years' {
	preserve
	keep if calyear==`year'
	expand 2 if date_==mdy(12,1,`year'), gen(expand)
	replace date_=mdy(12,31,`year') if expand==1
	drop expand
	if `year'==2016 {
		expand 2 if date_==mdy(4,1,`year'), gen(expand)
		replace date_=mdy(4,30,`year') if expand==1
		drop expand
	}
	tsset group date_, d
	tsfill, full
	sort group date
	foreach var of varlist sppname hullnum gearcat spstock* *_multiplier calyear aceprice {
		qui replace `var'=`var'[_n-1] if group==group[_n-1] & missing(`var')==1
	}
	*merge in lagged prices
	gen date=date_-1
	replace sppname="OTHER_"+gearcat if sppname=="OTHER"
	merge m:1 date sppname using "$input\prices2.dta", ///
		keep(1 3) keepusing(price_lb_deflated)
	assert _merge==3
	gen price_lb_lag1=price_lb_deflated
	drop price_lb_deflated date _merge sppname
	rename date_ date
	
	*convert multipliers to dollars
	replace aceprice=0 if spstock2~="SeaScallop"
	gen catch_multiplier_dollars=catch_multiplier*aceprice
	gen landing_multiplier_dollars=landing_multiplier*price_lb_lag1
	collapse (sum) *_dollars, by(spstock2_prim date hullnum gearcat)
	qui save "$working\temp_multipliers_dollars_`year'.dta", replace
	restore
}
clear
foreach year in `years' {
	append using "$working\temp_multipliers_dollars_`year'.dta"
	qui erase "$working\temp_multipliers_dollars_`year'.dta"
}
rename spstock2_prim spstock2
assert catch_multiplier_dollars==0 if date<mdy(3,1,2010)
qui save "$working\multipliers_dollars_POSTasPRE.dta", replace
use "$working\temp_premultipliers.dta", clear
rename landing_multiplier l_
rename catch_multiplier c_
reshape wide l_ c_, i(gearcat hullnum spstock2_prim month)j(spstock2)string
rename spstock2_prim spstock2
qui save "$working\premultipliers.dta", replace
qui erase "$working\temp_premultipliers.dta"

*save q's and crew, trip length, and DAS charge averages from pre data
use "$input\data_for_discrete_choice_pre.dta", clear
collapse (mean) crew trip_days trip_hours das_charge q emean, by(gearcat hullnum spstock month) 
egen group=group(gearcat hullnum spstock)
fillin group month
gsort group -gearcat
qui bysort group: carryforward gearcat hullnum spstock, replace
mdesc
drop group _fillin
foreach var of varlist crew trip_* das_charge q emean {
	bysort gearcat hullnum spstock: egen mean_`var'=mean(`var')
	replace `var'=mean_`var' if `var'==.
	drop mean_`var'
}
replace crew=round(crew,.1)
replace trip_days=round(trip_days,.01)
qui save "$working\temp_precrewtriplength.dta", replace
	
use "$input\data_for_discrete_choice_post.dta", clear
*drop actual post values
drop nofish id log* h_hat emean q exp_rev* *multiplier* _j
*make sure data are rectangular by hullnum, spstock (within gear), date
*fill in mvs as needed
egen group=group(gearcat hullnum spstock)
tsset group date
tsfill, full
gl cfvars "gearcat hullnum* spstock* len stateabb start_lat start_lon cf_lndlb_livlb spp*"
qui bysort group (date): carryforward $cfvars, replace
gen negdate=-date
qui bysort group(negdate): carryforward $cfvars, replace
sort group date
drop group negdate
foreach var of varlist $cfvars {
	assert missing(`var')==0
}
*re-encode spstock2--will now have the same number for NoFish option for GILLNETS and TRAWL
encode spstock2, gen(_j)
order $cfvars _j
foreach var of varlist choice choice_prev_fish {
	replace `var'=0 if `var'==.
}
*fill in NoFish=1 for dates that are filled in (vessels that dropped out) for comparison purposes
bysort hullnum2 date: egen totalchoice=total(choice)
replace choice=1 if spstock2=="NoFish" & totalchoice==0
drop totalchoice
*month, year, gffishingyear, post, groundfish indicator
foreach name in "month" "year" {
	replace `name'=`name'(date)
}
replace gffishingyear=year(date)
replace gffishingyear=gffishingyear-1 if month<5
replace post=gffishingyear>=2010
assert post==1
bysort spstock: egen max_gf=max(gf)
replace gf=max_gf if gf==.
drop max_gf
*pre period average crew, trip lengths, das charges
drop crew trip_days trip_hours das_charge
merge m:1 gearcat hullnum spstock month using ///
	"$working\temp_precrewtriplength.dta", keep(1 3)
assert _merge==3
drop _merge
qui erase "$working\temp_precrewtriplength.dta"
*prices
foreach var of varlist sppcode sppname {
	replace `var'="OTHER_"+gearcat if `var'=="OTHER"
}
merge m:1 date sppcode sppname using ///
	"$input\prices2.dta", keep(1 3)
assert _merge==3 if spstock~="No_Fish"
foreach var of varlist price_lb price_lb_sd {
	replace `var'=`var'_deflated
	assert `var'~=. if spstock~="No_Fish"
	drop `var'_deflated
}
drop _merge
rename date date_
foreach num of numlist 1 {
	gen date=date_-`num'
	merge m:1 date sppcode sppname using "$input\prices2.dta", ///
		keep(1 3) keepusing(price_lb_deflated)
	assert _merge==3 if spstock~="No_Fish"
	replace price_lb_lag`num'=price_lb_deflated
	drop price_lb_deflated date _merge
}
rename date_ date
drop spp*
*fuelprices
merge m:1 date using "$input\fuel_prices.dta", keep(1 3)
assert _merge==3
foreach state in "CT" "MA" "ME" "NH" "NJ" "NY" "RI" {
	replace fuelprice=fuelprice_deflated`state' ///
		if stateabb=="`state'"
}
assert fuelprice~=.
drop fuelprice_deflated?? _merge
*opportunity cost of time
merge m:1 month year using "$input\BLS_QCEW_relevant_wages_clean.dta", keep(1 3)
assert _merge==3
foreach state in "CT" "MA" "ME" "NH" "NJ" "NY" "RI" {
	replace wkly_crew_wage= /// \\ weighted avg of captain and crew
		(avg_wkly_wage_deflated`state'CAPTAIN+(avg_wkly_wage_deflated`state'CREW*(crew-1)))/(crew) ///
		if stateabb=="`state'"
}
bysort gearcat hullnum date: egen mean_wkly_crew_wage=mean(wkly_crew_wage)
replace wkly_crew_wage=mean_wkly_crew_wage if wkly_crew_wage==.
assert wkly_crew_wage~=.
drop _merge avg_wkly_wage_deflated??CREW avg_wkly_wage_deflated??CAPTAIN mean_wkly_crew_wage
*trawl survey weights
drop trawl_survey_weight
merge m:1 spstock2 gffishingyear using "$input\trawl_survey_weight.dta", ///
	keep(1 3) nogen
assert trawl_survey_weight~=. if inlist(spstock2,"Other","NoFish")==0
*sector ACLs
drop tac
merge m:1 spstock2 gffishingyear using "$input\sectorACLs.dta", ///
	keep(1 3) nogen
assert tac~=. if gf==1
*primary & secondary
replace primary=1
replace secondary=0
*log vars
foreach var of varlist crew trip_days trip_hours trawl_survey_weight {
	gen log_`var'=ln(`var')
}
mdesc log_crew log_trip_days log_trawl_survey_weight month ///
	gffishingyear primary secondary q emean if spstock~="No_Fish"

		
*****vars that remain as they are:
*len
*fuelprice 							(re-merge to fill in mvs)
*fuelprice_len 						(re-gen to fill in mvs)
*wkly_crew_wage 					(re-merge to fill in mvs)
*start_of_season 					(re-gen to fill in mvs)
*alt-specific constants


*****change the rest
*das_price_mean & das_price_mean_len
bysort date: egen mean_das_price_hat=mean(das_price_hat)
replace das_price_hat=mean_das_price_hat if das_price_hat==.
replace das_price_mean=das_price_hat
drop *das_price_hat


*distance & fuelprice_distance
	*fish_lat and fish_lon are dropped, but still have starting lats and lons
	*re-merge fish_lat and fish_lon in using pre-period centroids and recalc distances
cap drop fish_lat fish_lon distance
replace post=0
merge m:1 gearcat spstock month post using ///
	"$input\fish_latslons.dta", keep(1 3)
assert _merge==3 if spstock~="No_Fish"
drop _merge
replace post=1
drop distance
geodist fish_lat fish_lon start_lat start_lon, gen(distance) mi
assert distance~=. if spstock~="No_Fish"
*mean_wind
drop *wind*
merge m:1 date gearcat spstock using /// 
	"$input\weather_reanalysis.dta", keep(1 3)
assert _merge==3 if spstock~="No_Fish"
drop _merge


*partial_closure
tab spstock if partial_closure==1
*unlikely that gf vessels fishing for SMB-herring could influence closures, so leave alone
*for the rest, we know of spillover stories, so assume stayed open?
*or bookend by leaving as-is for one run and leaving open for another
replace partial_closure=0 if spstock~="Squid_Mackerel_Butterfish_Herring" | ///
	partial_closure==.


*permitted & LApermit
cap drop permitted LApermit
	*LA: use FY2009 holdings (lapsed permits are not freely re-obtained)
	*assume always permitted for gf since all these vessels fished gf both pre and post
rename gffishingyear gffishingyear_
gen gffishingyear=2009
merge m:1 hullnum gffishingyear spstock using ///
	"$input\LA_permits_by_vessel_fy.dta", keep(1 3)
foreach var of varlist permitted LApermit {
	replace `var'=1 if gf==1 | spstock=="No_Fish"
}
tab spstock if permitted==.
tab spstock if LApermit==.
	*OA: Skates, spiny dogfish (and SMB but not herring) open access
	*assume all vessels permitted to fish for OA stuff
replace permitted=1 if inlist(spstock,"Spiny_Dogfish","Skates")
replace LApermit=0 if inlist(spstock,"Spiny_Dogfish","Skates")
assert permitted~=. & LApermit~=.
drop _merge
label var permitted "Open-access/permit to fish"
label var LApermit "Limited-access permit to fish"
drop gffishingyear
rename gffishingyear_ gffishingyear

*generate start-of-season indicator
*SMB-Herring=Jan 1
*GF, Hake, Spiny Dogfish, Skates, Monkfish, Lobster=May 1
*Scallop=Mar 1 in general
*for Other just set as 0
cap drop start_of_season
tab spstock if gf==0
*for default (unmerged hullnums below), follow the GC/IFQ pattern:
	*May-June for 2004-2008 inclusive
	*March-April for 2009-2015 inclusive
gen start_of_season=0
replace start_of_season=1 if spstock=="Sea_Scallop" ///
	& inlist(month,5,6) & year(date)<2009
replace start_of_season=1 if spstock=="Sea_Scallop" ///
	& inlist(month,3,4) & year(date)>=2009
merge m:1 hullnum date spstock using ///
	"$input\scallop_start_of_season.dta", ///
	keep(1 3) keepusing(sos)
replace start_of_season=sos if _merge==3
drop _merge sos
tab month gffishingyear if spstock=="Sea_Scallop"
replace start_of_season=1 if gf==1 & ///
	inlist(month,5,6)
replace start_of_season=1 if ///
	inlist(spstock,"Skates","Spiny_Dogfish","Monkfish","American_Lobster","Red_Silver_Offshore_Hake") ///
	& inlist(month,3,4)
replace start_of_season=1 if spstock== ///
	"Squid_Mackerel_Butterfish_Herring" & ///
	inlist(month,1,2)
replace start_of_season=0 if inlist(spstock,"Other","No_Fish")
drop fish_lat fish_lon

*merge in multipliers in dollar terms (pre-collapsed)
merge 1:1 date hullnum spstock2 ///
	using "$working\multipliers_dollars_POSTasPRE.dta", keep(1 3)
assert _merge==3 if spstock~="No_Fish"
drop _merge
	
	
*zero out disc choice alt-specific (not case) vars for no-fish option (leave anything that will go in first nest as-is, e.g., noreast_wind, fuelprice)
foreach var of varlist distance price* *trip_hours *trip_days gf *multiplier* q m*_wind noreast_wind permit LApermit crew {
	replace `var'=0 if spstock=="No_Fish"
}
foreach var of varlist log_* {
	replace `var'=. if spstock=="No_Fish"
}

*rescale some vars
replace distance=distance/100
foreach var of varlist wkly_crew_wage {
	replace `var'=`var'/1000
}

*generate interaction terms (can't use factor vars on non-integer values)
replace fuelprice_len=fuelprice*len
replace fuelprice_distance=fuelprice*distance
replace distance_len=distance*len
foreach var of varlist das_charge das_price_mean mean_wind max_wind {
	cap drop `var'_len
	gen `var'_len=`var'*len
}
gen mean_wind_noreast=mean_wind*noreast_wind

*label vars
label var start_of_season "Start of fishing season indicator (months 1 and 2)"
label var max_wind "Max wind speed (m/s)"
label var mean_wind "Avg. wind speed (m/s)"
label var mean_wind_noreast "Avg. wind speed*northeast"
label var q "Catchability coefficient/vessel FE"
label var permitted "Open-access/permit to fish"
label var LApermit "Limited-access permit to fish"
label var distance "Distance (100s of miles) from port to month-specific stock area"
label var gffishingyear "Groundfish fishing year"
label var gearcat "Gear category"
label var price_lb "Price/lb (deflated)"
label var price_lb_lag1 "Price/lb (deflated), one-day lag"
label var das_charge "DAS charge"
label var das_charge_len "DAS charge*vessel length (100s ft)"
label var das_price_mean_len "DAS price*vessel length (100s ft)"
label var fuelprice "Fuel price (deflated)"
label var fuelprice_len "Fuel price*vessel length (100s ft)"
label var distance_len "Distance (100s of miles)*length (100s ft)"
label var len "Vessel length (100s ft)"
label var fuelprice_distance "Fuel price*distance (100s of miles)"
label var crew "Number of crew"
label var mean_wind_len "Avg. wind speed*length"
label var max_wind_len "Max wind speed*length"
label var wkly_crew_wage "Average weekly wages for non-crew work (in $1000s)"
label var partial_closure "Partial closure"
label var choice_prev_fish "Previous targeting choice"
label var catch_multiplier_dollars "Catch multiplier (for quota charges)"
label var landing_multiplier_dollars "Landings multiplier (for revenues)"
label var choice "Actual choice"

egen id=group(hullnum2 date)
mdesc choice fuelprice_distance distance partial_closure ///
	mean_wind mean_wind_noreast permitted LApermit choice_prev_fish ///
	wkly_crew_wage len fuelprice fuelprice_len start_of_season id spstock2
cap drop qtykept
merge 1:1 hullnum gearcat spstock date using "$input\actual_post_qtykept.dta", keep(1 3)
replace qtykept=0 if qtykept==.
drop _merge
merge m:1 date using "$input\trip_limits_forsim.dta", keep(1 3) nogen
compress
qui save "$working\data_for_simulations_POSTasPRE.dta", replace

qui log close


*end of do-file